LME

rm(list=ls())
#preventing startup messages and call pacage, if it daoesnt exsist, then install and call
suppressPackageStartupMessages(require(xts)) || {install.packages("xts"); require(xts)}
## [1] TRUE
suppressPackageStartupMessages(require(tidyverse)) || {install.packages("tidyverse"); require(tidyverse)}
## [1] TRUE
suppressPackageStartupMessages(require(rvest)) || {install.packages("rvest"); require(rvest)}
## [1] TRUE
suppressPackageStartupMessages(require(anytime)) || {install.packages("anytime"); require(anytime)}
## [1] TRUE
suppressPackageStartupMessages(require(lubridate)) || {install.packages("lubridate"); require(lubridate)}
## [1] TRUE
suppressPackageStartupMessages(require(mosaic)) || {install.packages("mosaic"); require(mosaic)}
## [1] TRUE
suppressPackageStartupMessages(require(stringr)) || {install.packages("stringr"); require(stringr)}
## [1] TRUE
suppressPackageStartupMessages(require(ggvis)) || {install.packages("ggvis"); require(ggvis)}
## [1] TRUE
suppressPackageStartupMessages(require(plotly)) || {install.packages("plotly"); require(plotly)}
## [1] TRUE
suppressPackageStartupMessages(require(dygraphs)) || {install.packages("dygraphs"); require(dygraphs)}
## [1] TRUE
#require(data.table)
#library(quantmod)
#Aluminium<-readRDS("Aluminium_data")
#Copper<-readRDS("copper_data")

Norsk Hydro

dateformat <- col_date(format = "%Y%m%d")
Norsk_hydro <- read_csv("https://www.netfonds.no/quotes/paperhistory.php?paper=NHY.OSE&csv_format=csv", col_types = cols(quote_date = dateformat))
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 74 parsing failures.
## row # A tibble: 5 x 5 col     row col   expected   actual   file                                     expected   <int> <chr> <chr>      <chr>    <chr>                                    actual 1  1269 value an integer 1297767~ 'https://www.netfonds.no/quotes/paperhi~ file 2  2711 value an integer 2761167~ 'https://www.netfonds.no/quotes/paperhi~ row 3  2806 value an integer 2207866~ 'https://www.netfonds.no/quotes/paperhi~ col 4  2807 value an integer 2309111~ 'https://www.netfonds.no/quotes/paperhi~ expected 5  2808 value an integer 3834113~ 'https://www.netfonds.no/quotes/paperhi~
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
#checking the reason for "Warning: 74 parsing failures"
summary(Norsk_hydro)
##    quote_date            paper               exch          
##  Min.   :1997-09-17   Length:5303        Length:5303       
##  1st Qu.:2003-02-05   Class :character   Class :character  
##  Median :2008-05-19   Mode  :character   Mode  :character  
##  Mean   :2008-05-12                                        
##  3rd Qu.:2013-08-22                                        
##  Max.   :2018-11-30                                        
##                                                            
##       open            high            low            close      
##  Min.   :15.28   Min.   :15.28   Min.   : 2.66   Min.   :15.28  
##  1st Qu.:24.88   1st Qu.:25.07   1st Qu.:24.61   1st Qu.:24.86  
##  Median :31.14   Median :31.50   Median :30.65   Median :31.10  
##  Mean   :35.66   Mean   :36.07   Mean   :35.20   Mean   :35.63  
##  3rd Qu.:43.79   3rd Qu.:44.28   3rd Qu.:43.26   3rd Qu.:43.76  
##  Max.   :86.00   Max.   :86.70   Max.   :84.20   Max.   :85.60  
##                                                                 
##      volume              value          
##  Min.   :    30826   Min.   :5.610e+05  
##  1st Qu.:  5469251   1st Qu.:1.649e+08  
##  Median :  8421994   Median :2.855e+08  
##  Mean   : 11719913   Mean   :3.955e+08  
##  3rd Qu.: 14347290   3rd Qu.:4.833e+08  
##  Max.   :517864304   Max.   :2.131e+09  
##                      NA's   :74
Norsk_hydro <- Norsk_hydro %>% select(c(1,7))
names(Norsk_hydro) <- c("date", "nhy")
glimpse(Norsk_hydro)
## Observations: 5,303
## Variables: 2
## $ date <date> 2018-11-30, 2018-11-29, 2018-11-28, 2018-11-27, 2018-11-...
## $ nhy  <dbl> 40.50, 40.95, 41.40, 41.15, 41.79, 41.05, 41.86, 42.12, 4...
# simple ggvis line graph for overview

Norsk_hydro %>%
  ggvis(~date, ~nhy) %>%
  layer_lines()
print (Norsk_hydro)
## # A tibble: 5,303 x 2
##    date         nhy
##    <date>     <dbl>
##  1 2018-11-30  40.5
##  2 2018-11-29  41.0
##  3 2018-11-28  41.4
##  4 2018-11-27  41.2
##  5 2018-11-26  41.8
##  6 2018-11-23  41.0
##  7 2018-11-22  41.9
##  8 2018-11-21  42.1
##  9 2018-11-20  41.3
## 10 2018-11-19  42.0
## # ... with 5,293 more rows

exchange rate USD/NOK, shows the value of a US dollar in norwigian Crowns

USD<- read_csv("https://www.netfonds.no/quotes/paperhistory.php?paper=USDNOK.FXSX&csv_format=csv ",
               col_types = cols(quote_date = dateformat))
#selectingclose values and date
USD <- USD %>% select(c(1,7)) 
names(USD) <- c("date","NOK_USD")
USD %>% 
    ggvis(~date,~NOK_USD) %>% 
    layer_lines()
# the data seems to be lacking from 2006- 2013- but i am keeping it  for later analysis
#which is somthing you'll notice if you hover your computer mouse of the straight line 
ggplotly(ggplot(USD, aes(date, NOK_USD)) + geom_line(color = "#E7B800"))

interactive graphs

NHY <- xts(Norsk_hydro$nhy, order.by = Norsk_hydro$date)
#usd <- xts(USD$NOK_USD, order.by = USD$date, frequency = 365)
#interactive dygraph over nhy data, value during the 2008 crisis is marked
dygraph(NHY) %>%
  dyOptions(labelsUTC = TRUE, fillGraph=TRUE, fillAlpha=0.1, drawGrid = FALSE, colors="#00AFBB") %>%
  dyRangeSelector(keepMouseZoom = T) %>%
  dyCrosshair(direction = "vertical") %>%
  dyHighlight(highlightCircleSize = 5, highlightSeriesBackgroundAlpha = 0.2, hideOnMouseOut = FALSE)  %>%
  dyRoller(rollPeriod = 1) %>%
  dyEvent("2008-11-20", "Financial crisis", labelLoc = "top")
  # removing 
rm(list = "dateformat")

Specifying the url for desired website to be scrapped the following are a link to westmetall market data

# URL to westmetall market data
url_Al <- "https://www.westmetall.com/en/markdaten.php?action=show_table&field=LME_Al_cash"
url_copper <- "https://www.westmetall.com/en/markdaten.php?action=show_table&field=LME_Cu_stock"
# Reading & checking the HTML code from the website
webpage_A <- read_html(url_Al)
webpage_C <- read_html(url_copper)

Aluminium (in US Dollar per ton)

CSS selectors to scrap desired sections, and creating a tibble

Aluminium<- tibble(date = webpage_A %>%  html_nodes("td:nth-child(1)") %>% html_text %>% 
                    str_replace_all("Febuary","February") %>% anydate,
              Cash_Settlement= webpage_A %>% html_nodes("td:nth-child(2)") %>% html_text %>%
                     str_replace_all("\\.",""),
              LME_3_month = webpage_A %>% html_nodes("td:nth-child(3)") %>% html_text %>%
                     str_replace_all("\\.",""),
              Stock = webpage_A %>% html_nodes("td:nth-child(4)") %>% html_text %>%
                     str_replace_all("\\.", "") %>% as.numeric())

Aluminium$Cash_Settlement<- as.numeric(str_replace_all(Aluminium$Cash_Settlement,",","."))
Aluminium$LME_3_month <-as.numeric(str_replace_all(Aluminium$LME_3_month,",","."))
glimpse(Aluminium)
## Observations: 4,185
## Variables: 4
## $ date            <date> 2018-11-30, 2018-11-29, 2018-11-28, 2018-11-2...
## $ Cash_Settlement <dbl> 1935.0, 1933.0, 1912.5, 1925.0, 1920.0, 1922.5...
## $ LME_3_month     <dbl> 1938.5, 1933.0, 1924.0, 1935.5, 1930.0, 1934.5...
## $ Stock           <dbl> 1052450, 1055275, 1057875, 1061700, 1064975, 1...
#saving the commodidity data
saveRDS(Aluminium, file = "Aluminium_data")

Copper

Copper <- tibble( date = webpage_C %>% html_nodes("td:nth-child(1)") %>% html_text %>%  
                    str_replace_all("Febuary","February") %>% anydate,
          Cash_settlment=webpage_C %>% html_nodes("td:nth-child(2)") %>% html_text %>%
                str_replace_all("\\.",""),
          LME_3_month = webpage_C %>% html_nodes ("td:nth-child(3)") %>% html_text %>%
                str_replace_all("\\.",""),
          Stock = webpage_C %>% html_nodes("td:nth-child(4)") %>% html_text %>%
                str_replace_all("\\.","") %>% as.numeric())
Copper$Cash_settlment <- as.numeric(str_replace_all(Copper$Cash_settlment,",","."))
Copper$LME_3_month <- as.numeric(str_replace_all(Copper$LME_3_month,",","."))
summary(Copper)
##       date            Cash_settlment   LME_3_month        Stock       
##  Min.   :2002-01-02   Min.   : 1421   Min.   : 1445   Min.   : 25525  
##  1st Qu.:2006-03-24   1st Qu.: 4088   1st Qu.: 3968   1st Qu.:158300  
##  Median :2010-06-18   Median : 6305   Median : 6313   Median :276675  
##  Mean   :2010-06-18   Mean   : 5726   Mean   : 5709   Mean   :327280  
##  3rd Qu.:2014-09-11   3rd Qu.: 7371   3rd Qu.: 7366   3rd Qu.:431600  
##  Max.   :2018-11-30   Max.   :10148   Max.   :10124   Max.   :980075
print(Copper)
## # A tibble: 4,273 x 4
##    date       Cash_settlment LME_3_month  Stock
##    <date>              <dbl>       <dbl>  <dbl>
##  1 2018-11-30          6238        6202  136175
##  2 2018-11-29          6282        6241  136250
##  3 2018-11-28          6198        6157  137325
##  4 2018-11-27          6170.       6136. 137850
##  5 2018-11-26          6246        6219  137100
##  6 2018-11-23          6210        6188  139550
##  7 2018-11-22          6246        6224  143325
##  8 2018-11-21          6242        6218  143275
##  9 2018-11-20          6262        6240  145250
## 10 2018-11-19          6260        6245  151625
## # ... with 4,263 more rows
saveRDS(Copper, file = "copper_data")
rm(list = c("webpage_A","webpage_C", "url_Al","url_copper"))

creating interactive graph of the substitutes metals

Almun <- xts(Aluminium$Cash_Settlement, order.by = Aluminium$date, frequency = 365)
Copp <- xts(Copper$Cash_settlment, order.by = Copper$date, frequency =  365)

risk_factors <- cbind(Almun,Copp)

dygraph(risk_factors,ylab="Cash_Settlement", 
        main="Copper and Aluminium Cash_Settlements") %>%
  dySeries("..1",label="Copper") %>%
  dySeries("..2",label="Aluminium") %>% 
  dyOptions(colors = c("blue","brown")) %>%
  dyRangeSelector(dateWindow = c("2002-02-01", "2018-12-01"))
# selecting data for further manipulation
Aluminium <- Aluminium %>% select(c(1:2))
Copper<-Copper %>% select(c(1:2))
names(Aluminium) <- c("date", "Aluminium")
names(Copper) <- c("date", "Copper")
rm(list=c("Almun","Copp", "risk_factors", "NHY"))
#joning the data and commodities  
nhy_sector<-left_join(Norsk_hydro, Aluminium, by = c("date")) %>%
      left_join(.,Copper,by = c("date"))
# some missing data due to lack registrations on this dates 
#and becouse the NHY dat streches further back (several years)
summary(nhy_sector)
##       date                 nhy          Aluminium        Copper     
##  Min.   :1997-09-17   Min.   :15.28   Min.   :1254   Min.   : 1421  
##  1st Qu.:2003-02-05   1st Qu.:24.86   1st Qu.:1658   1st Qu.: 4091  
##  Median :2008-05-19   Median :31.10   Median :1895   Median : 6297  
##  Mean   :2008-05-12   Mean   :35.63   Mean   :1952   Mean   : 5727  
##  3rd Qu.:2013-08-22   3rd Qu.:43.76   3rd Qu.:2202   3rd Qu.: 7370  
##  Max.   :2018-11-30   Max.   :85.60   Max.   :3292   Max.   :10148  
##                                       NA's   :1201   NA's   :1117
nhy_sector <- nhy_sector[complete.cases(nhy_sector), ]
# switiching from wide to long
nhy_long <- nhy_sector %>% gather(key = "stock", value = "value", -date)
nhy_long
## # A tibble: 12,303 x 3
##    date       stock value
##    <date>     <chr> <dbl>
##  1 2018-11-30 nhy    40.5
##  2 2018-11-29 nhy    41.0
##  3 2018-11-28 nhy    41.4
##  4 2018-11-27 nhy    41.2
##  5 2018-11-26 nhy    41.8
##  6 2018-11-23 nhy    41.0
##  7 2018-11-22 nhy    41.9
##  8 2018-11-21 nhy    42.1
##  9 2018-11-20 nhy    41.3
## 10 2018-11-19 nhy    42.0
## # ... with 12,293 more rows
#na here accurs becouse of the close data for NHY streches back to 1997
# and ome few missing value, du to lack of registration
summary(nhy_long)
##       date               stock               value         
##  Min.   :2002-01-02   Length:12303       Min.   :   17.06  
##  1st Qu.:2006-03-02   Class :character   1st Qu.:   46.34  
##  Median :2010-08-18   Mode  :character   Median : 1812.00  
##  Mean   :2010-07-07                      Mean   : 2556.03  
##  3rd Qu.:2014-10-15                      3rd Qu.: 3986.00  
##  Max.   :2018-11-30                      Max.   :10148.00
#arrange in ascending order
nhy_long <- nhy_long %>% arrange(date)
# calculate the returns of each year
names(nhy_sector)
## [1] "date"      "nhy"       "Aluminium" "Copper"
nhy_return <- nhy_sector %>% mutate(., nhy_returns=c(NA, diff(log(nhy))),
                              Aluminium_returns=c(NA, diff(log(Aluminium))),
                              Copper_returns=c(NA, diff(log(Copper))))
# remove the nas
nhy_return <- nhy_return[complete.cases(nhy_return), ]

#
nhy_return <- nhy_return %>% mutate(nhy_cumulative=cumsum(nhy_returns),
                              Aluminium_cumlative=cumsum(Aluminium_returns),
                              Copper_cumulative=cumsum(Copper_returns))
#selecting and adding the returns over time
nhy_long <- nhy_return %>% 
            select(date,nhy_cumulative,Aluminium_cumlative,Copper_cumulative) %>%
            gather(key = "stock",value = "value", -date)

# graph the long dataframe
ggplot(nhy_long, aes(date, value))+ 
  geom_line(aes(color = stock)) +
  scale_color_manual(values = c("#00AFBB", "#E7B800","#CC0000"))

#changing to xts format
nhy_cumulative <- xts(nhy_return$nhy_cumulative, order.by = nhy_return$date)
Aluminium_cumulative <- xts(nhy_return$Aluminium_cumlative, order.by = nhy_return$date)
Copper_cumulative <- xts(nhy_return$Copper_cumulative, order.by = nhy_return$date)
#combining columns
cumulative_stocks <- cbind(nhy_cumulative,Aluminium_cumulative,Copper_cumulative)
#creating interactive graph
cum_dygraph <-dygraph(cumulative_stocks, ylab="cumulative", 
        main="Cumulative data") %>%
  dySeries("..1",label="NHY") %>%
  dySeries("..2",label="Aluminium") %>% 
  dySeries("..3",label ="Copper") %>%
  dyOptions(colors = c("lightblue","brown", "orange")) %>%
  dyEvent("2008-11-20", "Financial crisis", labelLoc = "bottom") %>%
  dyShading( "2008-1-1", "2009-6-1","#FFE6E6") %>%
  dyShading("2018-1-1", "2018-11-1") %>%
  dyRangeSelector(dateWindow = c("2006-02-01", "2018-12-01"))
cum_dygraph
#removing dataframes, since the information is saved within the return dataframe
rm(list=c("cumlative_stocks","nhy_cumulative","Aluminium_cumulative","Copper_cumulative",
            "Aluminium","Copper","nhy_sector","Norsk_hydro"))
## Warning in rm(list = c("cumlative_stocks", "nhy_cumulative",
## "Aluminium_cumulative", : object 'cumlative_stocks' not found
# confirming what the obvious , through linaer regression
nhy_return %>% 
  ggvis(~nhy, ~Aluminium) %>%
  layer_points() %>%
  layer_model_predictions(model = "lm", se = TRUE)
## Guessing formula = Aluminium ~ nhy